GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R"))
source(file.path(GIT_HOME, "common_scripts/pathway_analysis/functions.GSEA.R"))

setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
t.df = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T)

annot = get(load("Hemap_immunology_Annotations.Rdata"))

# gexp data
data=t(get(load("data9544_with_gene_symbols.RData")))
data=data[,colnames(data)%in%annot$GSM.identifier..sample.]

t.df=t.df[order(t.df[,3]),]
t.df=t.df[t.df$name%in%"Cancer_Myeloma",]
# rank patients by number of testis antigens expressed
# profiles
profile=get(load("/research/groups/sysgen/PROJECTS/HEMAP/HEMAP/dat2figs/revision_2018/petri/mixtureM_profile.Rdata"))
profile[profile==-1] = 0
profile2=profile[,colnames(profile)%in%annot$GSM.identifier..sample.]

# take only high expressed into account
profile2[data.matrix(data)<5]=0

expressed_testis_num=colSums(profile2[rownames(profile2)%in%unique(t.df$gene),])
feat_class=expressed_testis_num
feat_class[expressed_testis_num==0]="0_Antigens"
feat_class[expressed_testis_num>=1&expressed_testis_num<=4]="1to4_Antigens"
feat_class[expressed_testis_num>=5&expressed_testis_num<=6]="5to6_Antigens"
feat_class[expressed_testis_num>=7]="over7_Antigens"

# plot CGA to hemap:

colorv=rep("grey75", length(annot$y))

colorf=colorRamp2(breaks = c(0, 2, 4, 6, 7),colors = c("grey75", "#f4acac", "#f77777", "#e23030", "#800000"))

Plot_color_vector=function(X, color, NAME=NULL, SIZE=0.8, TITLE="tSNE plot", PATH_OUTPUT=getwd(), peaks=NULL) {
  
  # if cluster centers are not defined as inputs, do not plot them.
  CLUSTER_CENTRE=ifelse(is.null(peaks), F, T)
  
  # Color vector
  datCol=color
  
  # Prepare input matrix for plotting.
  dat2show <- cbind(X$x, X$y)
  df=as.data.frame(dat2show)
  colnames(df) = c("X1","X2")
  
  # If for some reason we use a color vector which includes blanks,
  # create a color vector without them to be plotted at the front.
  # (Increases visibility of the "important" colored samples.)
  front=!datCol==""
  
  # Call actual plotting function.
  p=drawFig(df, CLUSTER_CENTRE, datCol, front, TITLE, SIZE, peaks)
  
  if(!is.null(NAME)){
    # Write out print quality figure as PDF.
    ggsave(paste0(PATH_OUTPUT, NAME, "_singlepage.pdf"), p, width = 210, height = 210, units = "mm", dpi=150)
  }
  return(p)
}

pdf("FigS6B_TSNE_CGA_hemap.pdf", width = 6.5, height = 6)
Plot_color_vector(X = data.frame("x"=annot$x, "y"=annot$y), color = colorf(expressed_testis_num), NAME = "FigS6B_TSNE_CGA_hemap.pdf")
dev.off()

# Only hemap MM
load("Hemap_MM_subtypes.Rdata")

# plot CGA to hemap:
expressed_testis_num=colSums(profile2[rownames(profile2)%in%unique(t.df$gene),])
feat_class=expressed_testis_num
feat_class[expressed_testis_num==0]="0_Antigens"
feat_class[expressed_testis_num>=1&expressed_testis_num<=2]="1to2_Antigens"
feat_class[expressed_testis_num>=3&expressed_testis_num<=4]="3to4_Antigens"
feat_class[expressed_testis_num>=5&expressed_testis_num<=6]="5to6_Antigens"
feat_class[expressed_testis_num>=7]="over7_Antigens"


colorf=colorRamp2(breaks = c(0, 2, 4, 6, 7),colors = c("grey75", "#f4acac", "#f77777", "#e23030", "#800000"))

pdf("FigS6F_TSNE_CGA_hemap_MM_subtypes.pdf", width = 6.5, height = 6)
Plot_color_vector(X = data.frame("x"=coordinates.subtype$x, "y"=coordinates.subtype$y), color = colorf(expressed_testis_num[match(coordinates.subtype$ID, names(expressed_testis_num))]), NAME = "TSNE_CGA_hemap.pdf", SIZE = 4)
dev.off()